#----------------------------------------------
# Measuring Misperceptions: Limits of Party-Specific Stereotype Reports
# Orr and Huber, 2021, POQ
# Inputs: OrrHuber_POQ_Study1_clean.csv and publicly available national survey files
# Outputs: Replication of Study 1
# Figures 1, 2, 3, 4, A6
# Table A1
#----------------------------------------------


#----------------------------------------------
# Load Packages
#----------------------------------------------

library(readstata13)
library(dplyr)
library(reshape2)
library(ggplot2)
library(estimatr)

#----------------------------------------------
# Load Data
#----------------------------------------------

# Clean study 1 data is posted alongside this script

dat <- read.csv("OrrHuber_POQ_Study1_clean.csv", as.is = TRUE)


# National survey data is available online, and is not re-archived alongside this 
# script - see README for access instructions

cc <- read.dta13("CCES18_Common_OUTPUT_vv_topost.dta", nonint.factors = TRUE)
anes <- read.dta13("anes_timeseries_2016_Stata13.dta", nonint.factors = TRUE)
gss <- read.dta13("gss.dta", nonint.factors = TRUE)


#----------------------------------------------
# Estimate party traits from CCES data
#----------------------------------------------

cc$pid2 <- recode(cc$pid3, .default = NA_character_,
                  "Democrat" = "Democrat", "Republican" = "Republican")

cc$race <- recode(cc$race, "White" = "White", "Black" = "Black", .default = "Hisp_other")

cc$relig <- recode(cc$religpew, "Jewish" = "Jewish", "Atheist" = "Atheist_None", 
      "Agnostic"  = "Atheist_None", "Nothing in particular"  = "Atheist_None",
      "Muslim" = "Other", "Buddhist" = "Other", "Hindu" = "Other", .default = "Christian")

cces_rate_pid <- group_by(cc[!is.na(cc$pid2),], pid2) %>% 
  summarize(rel_chr = weighted.mean(relig == "Christian", w = commonweight, na.rm = TRUE),
            rel_jew = weighted.mean(relig == "Jewish", w = commonweight, na.rm = TRUE),
            rel_ath = weighted.mean(relig == "Atheist_None", w = commonweight, na.rm = TRUE),
            rel_oth = weighted.mean(relig == "Other", w = commonweight, na.rm = TRUE),
            rac_whi = weighted.mean(race == "White", w = commonweight, na.rm = TRUE),
            rac_bla = weighted.mean(race == "Black", w = commonweight, na.rm = TRUE),
            rac_oth = weighted.mean(race == "Hisp_other", w = commonweight, na.rm = TRUE)) %>% 
  t()
colnames(cces_rate_pid) <- cces_rate_pid[1,]
cces_rate_pid <- data.frame(apply(cces_rate_pid[-1,], 2, function(x) round(100*as.numeric(x), 2)), 
                        row.names = row.names(cces_rate_pid[-1,]))
cces_rate_pid$Label <- c("Christian", "Jewish", "Atheist, not religious", "Other religion",
                         "White", "Black", "Hispanic, Asian, other race")


#----------------------------------------------
# Estimate party traits from ANES data
#----------------------------------------------

anes$pid2 <- recode(anes$V161158x, .default = NA_character_,
                  "1. Strong Democrat" = "D", "2. Not very strong Democract" = "D", "3. Independent-Democrat" = "D",
                  "7. Strong Republican" = "R", "6. Not very strong Republican" = "R", "5. Independent-Republican" = "R")

anes$race <- recode(anes$V161310x, .default = NA_character_,
                    "1. White, non-Hispanic" = "White, non-Hispanic",
                    "2. Black, non-Hispanic" = "Black, non-Hispanic", 
                    "3. Asian, native Hawaiian or other Pacif Islr,non-Hispanic" = "Asian, Hispanic, other",
                    "4. Native American or Alaska Native, non-Hispanic" = "Asian, Hispanic, other",
                    "5. Hispanic" = "Asian, Hispanic, other",
                    "6. Other non-Hispanic incl multiple races [WEB: blank 'Other' counted as a race]" = "Asian, Hispanic, other")

anes$relig <- recode(anes$V161265x, .default = NA_character_,
                     "1. Mainline Protestant" = "Christian", "2. Evangelical Protestant" = "Christian",
                     "3. Black Protestant" = "Christian", "4. Roman Catholic" = "Christian",
                     "5. Undifferentiated Christian" = "Christian", "6. Jewish" = "Jewish",
                     "7. Other religion" = "Other", "8. Not religious" = "Athiest or not religious")

anes_rates <- group_by(anes[!is.na(anes$pid2),], pid2) %>% 
  summarize(white = weighted.mean(race == "White, non-Hispanic", w = V160102, na.rm = TRUE)*100,
            black = weighted.mean(race == "Black, non-Hispanic", w = V160102, na.rm = TRUE)*100,
            other = weighted.mean(race == "Asian, Hispanic, other", w = V160102, na.rm = TRUE)*100,
            christian = weighted.mean(relig == "Christian", w = V160102, na.rm = TRUE)*100,
            jewish = weighted.mean(relig == "Jewish", w = V160102, na.rm = TRUE)*100,
            athiest = weighted.mean(relig == "Athiest or not religious", w = V160102, na.rm = TRUE)*100,
            other_rel = weighted.mean(relig == "Other", w = V160102, na.rm = TRUE)*100)


#----------------------------------------------
# Estimate party traits from GSS data
#----------------------------------------------

gss$pid2 <- recode(gss$partyid, .default = NA_character_,
                  "Ind,near dem" = "D", "Not str democrat" = "D", "Strong democrat" = "D", 
                  "Ind,near rep" = "R", "Not str republican" = "R", "Strong republican" = "R")

gss$race_recode <- as.character(gss$race)
gss$race_recode[gss$hispanic %in% 2:50] <- "Other"

gss$relig_recode <- recode(gss$relig, .default = NA_character_,
                           "Protestant" = "Christian", "Catholic" = "Christian", 
                           "Jewish" = "Jewish", "None" = "Athiest or not religious",
                           "Buddhism" = "Other", "Hinduism" = "Other", "Other eastern" = "Other", 
                           "Moslem/islam" = "Other", "Orthodox-christian" = "Christian", "Christian" = "Christian",
                           "Native american" = "Other", "Inter-nondenominational" = "Other")

gss_rates <- group_by(gss[!is.na(gss$pid2) & gss$year == 2018,], pid2) %>% 
  summarize(white = weighted.mean(race_recode == "White", w = wtssall, na.rm = TRUE)*100,
            black = weighted.mean(race_recode == "Black", w = wtssall, na.rm = TRUE)*100,
            other = weighted.mean(race_recode == "Other", w = wtssall, na.rm = TRUE)*100,
            christian = weighted.mean(relig_recode == "Christian", w = wtssall, na.rm = TRUE)*100,
            jewish = weighted.mean(relig_recode == "Jewish", w = wtssall, na.rm = TRUE)*100,
            athiest = weighted.mean(relig_recode == "Athiest or not religious", w = wtssall, na.rm = TRUE)*100,
            other_rel = weighted.mean(relig_recode == "Other", w = wtssall, na.rm = TRUE)*100)


#----------------------------------------------
# Figure 1: Average perceptions of racial and ethnic composition of each party
#----------------------------------------------

ave_infer_unweighted <- function(data = dat, subgroup_var = NULL, subset_val = NULL){
  if(is.null(subgroup_var) | is.null(subset_val)){
    temp <- group_by(data, pid_vignette)
  } else {
    temp <- group_by(data[data[,subgroup_var] %in% subset_val,], pid_vignette)
  }
  temp <- dplyr::summarize(temp,
            rel_chr = mean(infer_rel_chr, na.rm = TRUE),
            rel_jew = mean(infer_rel_jew, na.rm = TRUE),
            rel_ath = mean(infer_rel_ath, na.rm = TRUE),
            rel_oth = mean(infer_rel_oth, na.rm = TRUE),
            rac_whi = mean(infer_rac_whi, na.rm = TRUE),
            rac_bla = mean(infer_rac_bla, na.rm = TRUE),
            rac_oth = mean(infer_rac_oth, na.rm = TRUE)) %>% 
    t()
  colnames(temp) <- temp[1,]
  temp <- data.frame(apply(temp[-1,], 2, function(x) round(as.numeric(x), 2)), 
                        row.names = row.names(temp[-1,]))
  temp$Label <- c("Christian", "Jewish", "Atheist, not religious", "Other religion",
                  "White", "Black", "Hispanic, Asian, other race")
  temp_merged <- rbind(temp, cces_rate_pid)
  temp_merged$Source <- c(rep("Inferences", 7), rep("CCES", 7))
  temp_merged$Area <- rep(c(rep("Religion", 4), rep("Race", 3)), 2)
  return(temp_merged)
}

infer_dem <- ave_infer_unweighted(subgroup_var = "pid3", subset_val = "Democrat")
infer_rep <- ave_infer_unweighted(subgroup_var = "pid3", subset_val = "Republican")
infer_full <- ave_infer_unweighted()

data.frame(rbind(melt(infer_rep[infer_rep$Area == "Race",], measure.vars = c("Democrat", "Republican"),
                       value.name = "Mean", variable.name = "Party"),
                 melt(infer_dem[infer_dem$Area == "Race" & infer_dem$Source == "Inferences",], measure.vars = c("Democrat", "Republican"),
                       value.name = "Mean", variable.name = "Party"),
                 melt(infer_full[infer_full$Area == "Race" & infer_full$Source == "Inferences",], measure.vars = c("Democrat", "Republican"),
                       value.name = "Mean", variable.name = "Party")),
           Party_name = c(rep("Democratic Party", 6), rep("Republican Party", 6),
                          rep("Democratic Party", 3), rep("Republican Party", 3),
                          rep("Democratic Party", 3), rep("Republican Party", 3)), 
           Source_bypid = factor(c(rep("Republicans'\nPerceptions", 3), rep("Party Demographics \n(CCES)", 3),
                                   rep("Republicans'\nPerceptions", 3), rep("Party Demographics \n(CCES)", 3),
                                   rep("Democrats'\nPerceptions", 6), rep("Public \nPerceptions", 6)),
                             levels = c("Democrats'\nPerceptions", "Republicans'\nPerceptions", "Public \nPerceptions", "Party Demographics \n(CCES)"))) %>% 
   mutate(Label_2 = factor(rep(c("White", "Black", "Hispanic, Asian, other race"), 8),
                          levels = c("Hispanic, Asian, other race", "Black", "White"))) %>% 
ggplot(aes(y = Mean, x = Source_bypid)) +
  facet_wrap(vars(Party_name), nrow = 2) +
  geom_bar(aes(fill = Label_2), stat = "identity", position = "stack") +
  geom_text(aes(label = round(Mean, 0)), position = position_stack(vjust = 0.5), size = 3.5) +
  scale_fill_manual(values = c("pink1", "plum3", "slateblue3"), guide = guide_legend(reverse = TRUE)) +
  labs(y = "Likelihood", x = "", fill = "Race and Ethnicity:") + 
  coord_flip() +
  scale_y_continuous(breaks = seq(0, 100, by = 10)) +
  theme_bw() +
  theme(legend.position = "bottom", strip.background = element_rect(fill = "white"))
ggsave(file = "Figure1.tiff", width = 7.5, height = 5)


#----------------------------------------------
# Figure 2: Average perceptions of religious composition of each party
#----------------------------------------------

data.frame(rbind(melt(infer_rep[infer_rep$Area == "Religion",], measure.vars = c("Democrat", "Republican"),
                       value.name = "Mean", variable.name = "Party"),
                 melt(infer_dem[infer_dem$Area == "Religion" & infer_dem$Source == "Inferences",], measure.vars = c("Democrat", "Republican"),
                       value.name = "Mean", variable.name = "Party"),
                 melt(infer_full[infer_full$Area == "Religion" & infer_dem$Source == "Inferences",], measure.vars = c("Democrat", "Republican"),
                       value.name = "Mean", variable.name = "Party")),
           Party_name = c(rep("Democratic Party", 8), rep("Republican Party", 8),
                             rep("Democratic Party", 4), rep("Republican Party", 4),
                          rep("Democratic Party", 4), rep("Republican Party", 4)), 
           Source_bypid = factor(c(rep("Republicans'\nPerceptions", 4), rep("Party Demographics \n(CCES)", 4),
                             rep("Republicans'\nPerceptions", 4), rep("Party Demographics \n(CCES)", 4),
                             rep("Democrats'\nPerceptions", 8), rep("Public \nPerceptions", 8)),
                             levels = c("Democrats'\nPerceptions", "Republicans'\nPerceptions", "Public \nPerceptions", "Party Demographics \n(CCES)"))) %>% 
  mutate(Label = factor(Label, levels = c("Other religion", "Jewish", "Christian", "Atheist, not religious"))) %>% 
ggplot(aes(y = Mean, x = Source_bypid, fill = Label)) +
  facet_wrap(vars(Party_name), nrow = 2) +
  geom_bar(stat = "identity") + 
  geom_text(aes(label = round(Mean, 0)), position = position_stack(vjust = 0.5), size = 3.5) +
  scale_fill_manual(values = c("pink1", "pink3", "mediumpurple2", "slateblue3"), 
                    guide = guide_legend(reverse = TRUE)) +
  labs(y = "Likelihood", x = "", fill = "Religion:") +
  coord_flip() +
  scale_y_continuous(breaks = seq(0, 100, by = 10), labels = seq(0, 100, by = 10)) +
  theme_bw() +
  theme(legend.position = "bottom", strip.background = element_rect(fill = "white"))
ggsave(file = "Figure2.tiff", width = 7.5, height = 5)


#----------------------------------------------
# Figure 3: Effect of vignette pid on perceived traits
#----------------------------------------------

dat$pid_vignette <- factor(dat$pid_vignette, levels = c("Republican", "Democrat"))

gaps_plotdat <- cbind(
  data.frame(rbind(
    summary(lm_robust(infer_rel_chr ~ pid_vignette, data = dat))$coefficients[2, 1:2],
    summary(lm_robust(infer_rel_jew ~ pid_vignette, data = dat))$coefficients[2, 1:2],
    summary(lm_robust(infer_rel_ath ~ pid_vignette, data = dat))$coefficients[2, 1:2],
    summary(lm_robust(infer_rel_oth ~ pid_vignette, data = dat))$coefficients[2, 1:2],
    summary(lm_robust(infer_rac_whi ~ pid_vignette, data = dat))$coefficients[2, 1:2],
    summary(lm_robust(infer_rac_bla ~ pid_vignette, data = dat))$coefficients[2, 1:2],
    summary(lm_robust(infer_rac_oth ~ pid_vignette, data = dat))$coefficients[2, 1:2],
    cbind(cces_rate_pid$Democrat - cces_rate_pid$Republican, NA)
  )),
  Comparison = "R vs D",
  Area = factor(rep(cces_rate_pid$Label, 2), 
                levels = cces_rate_pid$Label[c(3, 4, 2, 1, 7, 6, 5)]),
  Source = c(rep("Average Estimate", 7), rep("Approximate Truth", 7))
)

ggplot(gaps_plotdat, aes(y = Estimate, x = Area, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  ylim(-38, 38) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error), 
                width = 0, position = position_dodge(0.3)) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  labs(x = "", y = "Difference: Democrat - Republican") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin())
ggsave(file = "Figure3.tiff", height = 5, width = 5.5)


#----------------------------------------------
# Figure 4: Effect of vignette pid on perceived traits 
# among respondents who pass attention/comprehension check
#----------------------------------------------

gaps_plotdat_pass <- cbind(
  data.frame(rbind(
    summary(lm_robust(infer_rel_chr ~ pid_vignette, data = dat[dat$manip_pid100,]))$coefficients[2, 1:2],
    summary(lm_robust(infer_rel_jew ~ pid_vignette, data = dat[dat$manip_pid100,]))$coefficients[2, 1:2],
    summary(lm_robust(infer_rel_ath ~ pid_vignette, data = dat[dat$manip_pid100,]))$coefficients[2, 1:2],
    summary(lm_robust(infer_rel_oth ~ pid_vignette, data = dat[dat$manip_pid100,]))$coefficients[2, 1:2],
    summary(lm_robust(infer_rac_whi ~ pid_vignette, data = dat[dat$manip_pid100,]))$coefficients[2, 1:2],
    summary(lm_robust(infer_rac_bla ~ pid_vignette, data = dat[dat$manip_pid100,]))$coefficients[2, 1:2],
    summary(lm_robust(infer_rac_oth ~ pid_vignette, data = dat[dat$manip_pid100,]))$coefficients[2, 1:2],
    cbind(cces_rate_pid$Democrat - cces_rate_pid$Republican, NA)
  )),
  Comparison = "R vs D",
  Area = factor(rep(cces_rate_pid$Label, 2), 
                levels = cces_rate_pid$Label[c(3, 4, 2, 1, 7, 6, 5)]),
  Source = c(rep("Average Estimate", 7), rep("Approximate Truth", 7))
)

ggplot(gaps_plotdat_pass, aes(y = Estimate, x = Area, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  ylim(-38, 38) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error), 
                width = 0, position = position_dodge(0.3)) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  labs(x = "", y = "Difference: Democrat - Republican") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin())
ggsave(file = "Figure4.tiff", height = 5, width = 5.5)


#----------------------------------------------
# Table A1: Study 1 Survey Sample
#----------------------------------------------

round(prop.table(table(dat$gender)), 2)

2018 - quantile(dat$birthyr, .5, na.rm = TRUE)

round(prop.table(table(dat$race)), 2)

round(mean(grepl("Republican", dat$pid7)), 2)
round(mean(grepl("Democrat", dat$pid7)), 2)
round(mean(grepl("Independent|Not sure", dat$pid7)), 2)


#----------------------------------------------
# Figure A6: Effect of vignette pid relative to ANES and GSS baseline
#----------------------------------------------

pdf("FigureA6.pdf", height = 5.2, width = 5.5)
rbind(gaps_plotdat,
      data.frame(Estimate = unlist(c(c(anes_rates[1, -1] - anes_rates[2, -1]), 
                                     c(gss_rates[1, -1] - gss_rates[2, -1]))),
                 Std..Error = NA,
                 Comparison = "R vs D",
                 Area = factor(rep(c("White", "Black", "Hispanic, Asian, other race", 
                                     "Christian", "Jewish", "Atheist, not religious", "Other religion"), 2)),
                 Source = c(rep("Approximate Truth (ANES)", 7), rep("Approximate Truth (GSS)", 7)))) %>% 
  mutate(Source = recode(Source, "Approximate Truth" = "Approximate Truth (CCES)")) %>% 
ggplot(aes(y = Estimate, x = Area, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  ylim(-38, 38) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error), 
                width = 0, position = position_dodge(0.3)) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  labs(x = "", y = "Difference: Democrat - Republican") +
  coord_flip() +
  guides(shape = guide_legend(nrow = 2)) +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin(), 
        legend.text = element_text(size = 7))
dev.off()
